# install.packages ('gapminder')
library(gapminder)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
alt+- will add <-
shift+ctrl+c to add # infront of a line
‘—-’ for a header, so it is easy to navigate through the script
command +shift + m
ctrl+alt+i for new code chunk # syntax Plain text
end a line with two spaces to start a new paragraph.
italics and italics
bold and bold
superscript2
~strikethrough
link to rstudio
1==1 # equality
1!=3 #unequal
13<14 #13 smaller than 14
14>13 #14 bigger than 13
12>=0 #12 greater or equal to zero
12<=3 #12 smaller or equal to zero
family
name <- c('saneesh', 'sanusha', 'appu', 'kishan')
weight <- c(63,48, 20, NA)
height <- c(164, 150, NA, 75)
family <- data.frame(name, weight, height)
family %>% as_tibble()
library(tidyverse)
data <- data.frame(sex=c(rep('female', 10), rep('male', 8)),
score=c(rnorm(n= 10, mean = 7.56, sd = 1.978), rnorm(n= 8, mean=7.75, sd= 1.631)))
data
data %>% group_by(sex) %>%
summarise(score= n()) %>%
mutate(freq=score/sum(score)*100)
library(tidyverse)
years <- tribble(
~Location, ~Year, ~Month, ~Day, ~Lenght,
"Sydney", 2000, 9, 15,12.1213,
"Athens", 2004, 8, 13, 12.1212,
"Beijing", 2008, 8, 8,13.212,
"London", 2012, 7, 27,13.1212,
"Rio de Janeiro", 2016, 8, 5,65.00
)
# run privious code chunk
library(gt)
years %>% gt()
| Location | Year | Month | Day | Lenght |
|---|---|---|---|---|
| Sydney | 2000 | 9 | 15 | 12.1213 |
| Athens | 2004 | 8 | 13 | 12.1212 |
| Beijing | 2008 | 8 | 8 | 13.2120 |
| London | 2012 | 7 | 27 | 13.1212 |
| Rio de Janeiro | 2016 | 8 | 5 | 65.0000 |
years %>%
mutate(Lenght= round(Lenght, 2)) %>%
gt() %>%
tab_options(column_labels.font.size = 11,
column_labels.font.weight = "bold",
table.font.size = 10,
) %>%
opt_table_outline(style = "solid", width = px(2))
| Location | Year | Month | Day | Lenght |
|---|---|---|---|---|
| Sydney | 2000 | 9 | 15 | 12.12 |
| Athens | 2004 | 8 | 13 | 12.12 |
| Beijing | 2008 | 8 | 8 | 13.21 |
| London | 2012 | 7 | 27 | 13.12 |
| Rio de Janeiro | 2016 | 8 | 5 | 65.00 |
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
data <- data.frame(HairEyeColor)
data %>% tabyl(Hair, Eye) %>%
adorn_percentages('row') %>%
adorn_pct_formatting(digits = 2) %>%
adorn_ns() %>%
knitr::kable()
| Hair | Brown | Blue | Hazel | Green |
|---|---|---|---|---|
| Black | 25.00% (2) | 25.00% (2) | 25.00% (2) | 25.00% (2) |
| Brown | 25.00% (2) | 25.00% (2) | 25.00% (2) | 25.00% (2) |
| Red | 25.00% (2) | 25.00% (2) | 25.00% (2) | 25.00% (2) |
| Blond | 25.00% (2) | 25.00% (2) | 25.00% (2) | 25.00% (2) |
# identify location of NAs in vector
which(is.na(family))
## [1] 8 11
colSums(is.na(family))
## name weight height
## 0 1 1
mat <- matrix(sample(c(NA, 1:5), 50, replace = TRUE), 5)
df <- as.data.frame(mat)
df %>% replace(is.na(.), 0)%>% view()
see spread & gather # clean names
# install.packages('janitor')
library(janitor)
id <- (c(1,1,2,2,3,3))
Country <- c('Angola', 'Angola','Botswana', 'Botswana','Zimbabwe','Zimbabwe')
year <- c('2006', '2007', '2008', '2009', '2010', '2006')
bank.ratio <- c(24,25,38,34,42,49)
Reserve.ratio <- c(77,59,64,65,57,86)
broad.money <- c(163,188,317,361,150,288)
bank <- data.frame(id, Country, year, bank.ratio, Reserve.ratio,broad.money)
bank %>% view()
as_tibble()
## Warning: The `x` argument of `as_tibble()` can't be missing as of tibble 3.0.0.
bank <- bank %>% clean_names() # replaced . with _
glimpse(bank)
## Rows: 6
## Columns: 6
## $ id <dbl> 1, 1, 2, 2, 3, 3
## $ country <chr> "Angola", "Angola", "Botswana", "Botswana", "Zimbabwe", …
## $ year <chr> "2006", "2007", "2008", "2009", "2010", "2006"
## $ bank_ratio <dbl> 24, 25, 38, 34, 42, 49
## $ reserve_ratio <dbl> 77, 59, 64, 65, 57, 86
## $ broad_money <dbl> 163, 188, 317, 361, 150, 288
bank <- bank %>% clean_names() # replaced . with _
filter bank data frame below such that it retains a country if a given id is satisfied e.g. filtering a data frame that has countries with id 1 and 2 only
bank %>%
filter(id%in% c(1,2)) %>%
as_tibble()
summarise fund available with each countries
bank %>%
group_by(country) %>%
summarise(fund=sum(broad_money)) %>%
as_tibble()
column: new name= old name
iris %>%
rename(S.len=Sepal.Length,
Sp.= Species) %>% head(3)
iris %>%
rename_with(tolower) %>% head(3)
iris %>% select_at(vars(Species, Petal.Length), tolower) %>% head(3)
library(tidyverse)
mtcars <- mtcars %>% as_tibble(rownames="cars")
library(tibble)
iris %>% add_column(ob_no=1:150) %>% head(5)
iris %>% as_tibble() %>% head(3)
summary(gapminder)
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
str(gapminder)
## tibble [1,704 × 6] (S3: tbl_df/tbl/data.frame)
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num [1:1704] 28.8 30.3 32 34 36.1 ...
## $ pop : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num [1:1704] 779 821 853 836 740 ...
change name of observation— mutate (variable=recode (variable, ‘old name’=‘new name’)))
gapminder %>%
mutate(country=recode(country, 'India'='IND' )) %>%
filter(country=='IND') %>% head(3)
gapminder %>%
select(year, country, gdpPercap) %>% head(3)
msleep %>% select(starts_with("sleep")) %>% head(3)
iris %>% select(-Sepal.Length, -Species) %>% head(3)
or
iris %>% select(-c(Sepal.Length)) %>% head(3)
iris %>% select(!Sepal.Length) %>% head(3)
ends_withiris %>% select(ends_with('length')) %>% head(3)
starts_withiris %>% select(starts_with('Sepal')) %>% head(3)
gapminder %>%
select(year, country, lifeExp) %>%
filter(country=="Eritrea", year>1950) %>% head(3)
gapminder %>% filter(country=="Canada") %>% head(3) # from gapminder data filter country Canada and show only 2 observations
gapminder %>% filter(country!="Oman") %>% head(3) # from gapminder data filter all the other countries except Oman
iris %>% filter(Species!='setosa') %>% glimpse()
## Rows: 100
## Columns: 5
## $ Sepal.Length <dbl> 7.0, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5.0, 5.…
## $ Sepal.Width <dbl> 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.…
## $ Petal.Length <dbl> 4.7, 4.5, 4.9, 4.0, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.…
## $ Petal.Width <dbl> 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1.0, 1.3, 1.4, 1.0, 1.…
## $ Species <fct> versicolor, versicolor, versicolor, versicolor, versicolo…
iris %>% filter(!Species %in% c('setosa', 'versicolor')) %>% glimpse()
## Rows: 50
## Columns: 5
## $ Sepal.Length <dbl> 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.…
## $ Sepal.Width <dbl> 3.3, 2.7, 3.0, 2.9, 3.0, 3.0, 2.5, 2.9, 2.5, 3.6, 3.2, 2.…
## $ Petal.Length <dbl> 6.0, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.…
## $ Petal.Width <dbl> 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2.0, 1.…
## $ Species <fct> virginica, virginica, virginica, virginica, virginica, vi…
iris %>% filter(Petal.Width >=2 & Petal.Width <= 5) %>% glimpse()
## Rows: 29
## Columns: 5
## $ Sepal.Length <dbl> 6.3, 7.1, 6.5, 7.6, 7.2, 6.5, 6.8, 5.7, 5.8, 6.4, 7.7, 7.…
## $ Sepal.Width <dbl> 3.3, 3.0, 3.0, 3.0, 3.6, 3.2, 3.0, 2.5, 2.8, 3.2, 3.8, 2.…
## $ Petal.Length <dbl> 6.0, 5.9, 5.8, 6.6, 6.1, 5.1, 5.5, 5.0, 5.1, 5.3, 6.7, 6.…
## $ Petal.Width <dbl> 2.5, 2.1, 2.2, 2.1, 2.5, 2.0, 2.1, 2.0, 2.4, 2.3, 2.2, 2.…
## $ Species <fct> virginica, virginica, virginica, virginica, virginica, vi…
library(tidyverse)
mtcars <- mtcars %>% rownames_to_column
mtcars %>%
filter(str_detect(rowname, 'Merc')) %>% head(3) # filter only 'Merc'
mtcars %>% filter(!str_detect(rowname, 'Merc')) %>% head(3) # filter everything except 'Merc'
iris %>% pull(Species) %>% head(3) # returns vector values
## [1] setosa setosa setosa
## Levels: setosa versicolor virginica
iris %>% select(Species) %>% head (3) # returns a table with one column
iris %>% select(everything()) %>% head(3)
gapminder %>%
filter(country=="Oman" &
year>1980 &
year<=2000) %>% head(4)
gapminder %>%
select(country, year) %>%
filter(year>=1980, country=="India"|
country=="Oman"|
country=="Canada") %>% head(4)
gapminder %>% filter(country!="Oman") %>% head(3) # from gapminder data filter all the other countires exept Oman
gapminder %>% filter(country %in% c('Hungary','Iceland', 'Mongolia')) %>% head(3)
target <- c('Hungary','Iceland', 'Mongolia')
gapminder %>% filter(country %in% target) %>% head (3)
friends <- data.frame(Names=c('Saneesh', 'Appu', 'Shruti', 'Aradhana', 'Arathi', 'James Bond'),
age=c(40,9, 25, 25, 25, 50))
# data frame is friends
# columns in friends are Names, Age, Height, etc.
# Colum Name have 'Saneesh', 'Appu', 'Shruti', 'Aradhana', 'Arathi', 'James Bond'
# We want to filter information related to Sanees and James Bond only, so we created a vector with
# these names in it.
target <- c('Appu', 'James Bond') #and then
friends %>% filter(Names %in% target)
# or
friends %>% filter(Names== 'Appu'| Names== 'James Bond')
# or
friends %>% filter(Names %in% c('Appu', 'James Bond'))
gapminder %>%
select(-year,-pop) %>%
head(5)
gapminder %>%
filter(year==2007) %>%
group_by(country) %>%
summarise(meanLE=mean(lifeExp)) %>%
arrange(meanLE,decreasing = TRUE) %>% head(3)
gapminder %>%
group_by(country) %>%
summarise(minLE=min(lifeExp)) %>%
arrange(minLE,decreasing=FALSE) %>% head(3)
grouped by continent, then summarise two things, first n=n() number of rows in which each continent are or the size of each group, then the mean of the mean of the lifeExp variable.
gapminder %>%
group_by(continent) %>%
summarise(n=n(),
meanLife=mean(lifeExp))
gapminder %>%
group_by(continent) %>%
summarise(PopConti=sum(pop))
pets <- data.frame(names=c(rep('saneesh', 3), rep('appu', 2), 'sanusha'),
pet=c(rep('dog', 3), rep('cat', 2), 'tiger'), number=c(2,2,5,7,8,1),
size=c(rep('medium', 2), rep('small', 3), 'big'))
pets
library(tidyverse)
pets %>% group_by(pet, size) %>%
summarise(totalpet= sum(number))
## `summarise()` has grouped output by 'pet'. You can override using the `.groups`
## argument.
library(tidyverse)
plot <- c(rep(1,2), rep(2,4), rep(3,3))
bird <- c('a','b', 'a','b', 'c', 'd', 'a', 'b', 'c')
area <- c(rep(10,2), rep(5,4), rep(15,3))
birdlist <- data.frame(plot,bird,area)
birdlist
# summarize the following data frame to a summary table.
# option 1
birdlist %>%
group_by(plot) %>%
summarise(bird = n(), area = unique(area))
# option 2
birdlist %>%
count(plot, area, name = "bird")
gapminder %>%
summarise(mean(lifeExp))
gapminder %>%
summarise(range(lifeExp))
gapminder %>%
filter(country=="India") %>%
group_by(country) %>%
summarise(GDPmax=max(gdpPercap),
GDPmin=min(gdpPercap),
GDPmean=mean(gdpPercap))
iris %>% count(Species, name = 'how many')
mtcars %>%
count(am, name = 'number') %>%
as_tibble()
mtcars %>%
count(gear, name = 'no. gear')
library(tidyverse)
plot <- c(rep(1,2), rep(2,4), rep(3,3))
bird <- as.factor(c('a','b', 'a','b', 'c', 'd', 'a', 'b', 'c'))
area <- c(rep(10,2), rep(5,4), rep(15,3))
birdlist <- data.frame(plot,bird,area)
birdlist
#birdlist %>% group_by(plot, area) %>% mutate(count(bird))
birdlist %>%
group_by(plot, area) %>%
dplyr::summarize(bird = n(), # when summarize doesn't work directly use it (dplyr::)like this
.groups = "drop") # to summarize of a column with reference to two other variables.
treatment <- c(rep('ab',2), rep('bgrnf', 8), rep('bgpnf', 4))
site <- c('ab1', 'ab2',
rep('bgrnf1', 3),
rep('bgrnf2', 2),
'bgrnf3',
'bgrnf4',
'bgrnf5',
rep('bgpnf1', 2),
rep('bgpnf2', 2))
data <- data.frame(treatment, site)
library(tidyverse)
# to find the site per each treatment
data %>% group_by(treatment) %>% count(treatment, name= '#sites' )
library(dplyr)
library(stringr)
feedback <- c('good_book', 'good_read', 'good_story', 'good for knowledge')
book <- c('ramayana', 'bible', 'encyclopedia', 'Mbharatha')
df <- data.frame(feedback, book)
df %>%
mutate(response = case_when(str_starts(feedback, 'good') ~ 'good')) %>%
select(book, response) %>% as_tibble()
text to columns
df <- data.frame(films = c("Spider_man", "James_bond", "Iron_man", "Bat_man"))
df
df1 <- df %>%
separate(films, c("a", "b"), sep='([_])')
df1
df1 %>% unite("names", a:b, remove=FALSE)
df1 <- data.frame(id=c(1:4) ,films = c("Spider_man", "James_bond", "Iron_man", "Bat_man"))
df2 <- data.frame(id=c(1:4) ,country= rep("us", 4))
df3 <- left_join(df1, df2, by="id")
We are making a wide format from long format in the first example. The second example is to make a long format from wide.
# the following is already in long format
classdata <- data.frame(
studentname=c('captian', 'ant', 'james', 'spider', 'tony', 'bat','wonder' ), sibject=c('math', 'his', 'math', 'geo', 'his', 'geo','math' ),
grade=c('A+', 'B', 'B', 'A+', 'C', 'B+','C' ))
classdata %>% head()
wide.class <- spread(classdata, # name of the data frame
sibject, # new columns to be made
grade) # values to go into new columns
head(wide.class)
gather(wide.class, # name of the data frame
subject, # name of the column to put data into
grade, # name of the column to put value into
geo, his, math) %>% # from where values has to be gathered
drop_na()
df1 <- data.frame(id=c(1:4) ,films = c("Spider_man", "James_bond", "Iron_man", "Bat_man"))
df2<- data.frame(id=c(5:8) ,films = c("King Cong", "Silence of the lambs", "Intersteller", "Gravity"))
dplyr::bind_rows(df1, df2)
for multiple variables
library(tidyverse)
srno <- c(1:2)
film <- c("arabica", "robust")
rate <- c("good", "better")
lang_Eng <- c("yes", "yes")
films <- data.frame(srno, film, rate, lang_Eng)
str(films)
## 'data.frame': 2 obs. of 4 variables:
## $ srno : int 1 2
## $ film : chr "arabica" "robust"
## $ rate : chr "good" "better"
## $ lang_Eng: chr "yes" "yes"
films <- films %>%
mutate(across(c(rate, lang_Eng), as.factor))
str(films)
## 'data.frame': 2 obs. of 4 variables:
## $ srno : int 1 2
## $ film : chr "arabica" "robust"
## $ rate : Factor w/ 2 levels "better","good": 2 1
## $ lang_Eng: Factor w/ 1 level "yes": 1 1
select a key variable and everything or every other columns.
library(gapminder)
gapminder %>% select(pop, everything()) %>% head (3)
library(stringr)
data <- data.frame(Dose.Cm=c("d1", "D2", "D3"),
Len.km=c("High", 'low', 'Low'))
glimpse(data)
## Rows: 3
## Columns: 2
## $ Dose.Cm <chr> "d1", "D2", "D3"
## $ Len.km <chr> "High", "low", "Low"
data %>% mutate(Dose.Cm= tolower(Dose.Cm), Len.km=toupper(Len.km))
data <- data.frame(Dose.Cm=c("d1", "D2", "D3"),
Len.km=c("high", 'low', 'medium'))
data <- data %>% mutate(len= as.factor(Len.km))
glimpse(data)
## Rows: 3
## Columns: 3
## $ Dose.Cm <chr> "d1", "D2", "D3"
## $ Len.km <chr> "high", "low", "medium"
## $ len <fct> high, low, medium
data %>% mutate(len= fct_relevel(len, c('low', 'medium', 'high')))
This drops any non-numeric characters before or after the first number. The grouping mark specified by the locale is ignored inside the number.
library(tidyverse)
class <- c('8th', '9th', '10th')
students <- c('25-30', '35-41', '21-28')
school <- data.frame(class, students)
school
glimpse(school) # notice students is a binned variable it is a not a numeric.
## Rows: 3
## Columns: 2
## $ class <chr> "8th", "9th", "10th"
## $ students <chr> "25-30", "35-41", "21-28"
school %>% mutate(students= parse_number(students)) %>% glimpse()
## Rows: 3
## Columns: 2
## $ class <chr> "8th", "9th", "10th"
## $ students <dbl> 25, 35, 21
school %>% mutate(students= parse_number(students))
# now students because number with first value of the column
library(tidyverse)
rawdata <- data.frame(species_1=rnorm(n = 40, mean = 300, sd = 18.5), species_2=rnorm(40, 305, 16.7))
data <- pivot_longer(data = rawdata, cols = species_1:species_2, names_to = 'species', values_to = 'weight')
#sthda.com/english/wiki/ggplot2-barplots-quick-start-guide-r-software-and-data-visualization
df <- data.frame(dose=c("D0.5", "D1", "D2"),
len=c(4.2, 10, 29.5))
library(ggplot2)
# Basic barplot
p<-ggplot(data=df, aes(x=dose, y=len)) +
geom_bar(stat="identity")
p
# Horizontal bar plot
# p + coord_flip()
# Change the width of bars
ggplot(data=df, aes(x=dose, y=len)) +
geom_bar(stat="identity", width=0.5)
# Change colors
ggplot(data=df, aes(x=dose, y=len)) +
geom_bar(stat="identity", color="blue", fill="white")
# Minimal theme + blue fill color
p<-ggplot(data=df, aes(x=dose, y=len)) +
geom_bar(stat="identity", fill="steelblue")+
theme_minimal()
p
df <- data.frame(dose=c("D0.5", "D1", "D2", 'pp', 'kk', 'rr'),
len=c(4.2, 10, 29.5, 12, 15, 23))
library(ggplot2)
ggplot(df, aes(len))+
geom_density()+
geom_vline(aes(xintercept = mean(len)), col='red', linetype= 'dashed')
library(ggplot2)
ggplot(iris, aes(Petal.Length, Petal.Width))+
geom_point()+
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'
## raincloud plot
library(ggdist)
library(tidyverse)
library(tidyquant)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
mpg %>% filter(cyl %in% c(4,6,8)) %>%
ggplot(aes( x= factor(cyl), y= hwy, fill=factor(cyl)))+
# add half violin from `ggdist` package
ggdist::stat_halfeye(
# custom bandwidth
adjust = 0.5,
# move geom to right
justification= -0.2,
# remove slab interval
.width = 0,
point_color= NA
)+
# add boxplot
geom_boxplot(
width= 0.12,
# remove outliers
outlier.colour = NA,
alpha= 0.5
)+
# add dot plots from `ggdist` package
ggdist::stat_dots(
#orientation of the plot
side= 'left',
# move geom to the left
justification= 1.1,
# adjust grouping of observation
binwidth=0.25
)+
# adjust theme
scale_fill_tq()+
theme_tq()+
labs(
title= 'raincloud plot',
subtitle= 'showing bimodel distribution of 6 cylinder vehicles',
x= 'highway fuel efficiency',
y= 'cylinders'
)+
coord_flip()
library(tidyverse)
# install.packages("hexbin")
class <- c(rep('10th', 8))
students <- c('10 to 15',
"15-20",
"17 to 24",
"20 to 25",
"25 to 30",
"30 to 40",
"45 to 47",
'50 to 55')
latitude <- c(11.50897246,
11.48323136,
11.48719031,
11.46366611,
11.41097322,
11.52111154,
11.44491386,
11.46569568)
longitude <- c(76.06032062,
76.06192685,
76.04266851,
76.04156575,
76.05075092,
76.02846331,
76.03084141,
76.01766216)
school <- data.frame(class, students, latitude, longitude)
school %>% mutate(students= parse_number(students)) %>%
ggplot(aes(latitude, longitude, z= students))+
stat_summary_hex()+
scale_fill_viridis_c(alpha= 0.8)+
labs(fill='students', title = 'school students')
income.data <- data.frame(Village= c(rep('Chittor', 20),
rep('Bellari', 20)),
Income=c(rnorm(n = 20, mean = 1000, sd = 150),
rnorm(n = 20, mean = 1000, sd = 150)))
library(ggplot2)
ggplot(income.data, aes(Village, Income))+
geom_boxplot()+
stat_summary(geom = 'point',
fun= mean,
col= 'red')
income.data <- data.frame(Village= c(rep('Chittor', 20),
rep('Bellari', 20)),
Income=c(rnorm(n = 20, mean = 1000, sd = 150),
rnorm(n = 20, mean = 1000, sd = 150)))
library(ggplot2)
ggplot(income.data) +
geom_vline(aes(xintercept = mean(Income)), linetype = 'dashed')+
geom_density(aes(x = Income, color = Village)) +
geom_vline(xintercept = 959, linetype= 'dotted', col= '#f39c96')+
geom_vline(xintercept = 1051, linetype= 'dotted', col= '#00bfc4')
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
data <- data.frame(
category=c("Poaceae",
'Fabaceae',
"Asteraceae",
"Acanthaceae",
'Rubiaceae',
'Euphorbiaceae',
'Others'),
count=c(18,15,8,4,4,3,17 )
)
fig <- data %>% plot_ly(labels= ~ category, values= ~ count)
fig <- fig %>% add_pie(hole= 0.4) %>%
layout(title= "Donut charts using Plotly", showlegend = T)
fig
library(tidyverse)
df <- tribble(
~gender, ~height,
'male', 12,
'male', 8,
'female',11.5,
'female',11
)
ggplot(df, aes( gender, height))+
geom_point()+
annotate(
geom = 'text',
x= 1.29,
y= 11.4,
label= 'short person',
color= 'red',
size= 3,
fontface= 'italic'
)+
annotate(
geom = 'segment',
x= 1.05, # starting point on x, this decides length
xend = 1.3, # end point on x, this decides length
y= 11.02, # starting point on y
yend = 11.3, # ending point on y
color= 'blue',
linetype= 'dashed'
)+
annotate(
geom = 'segment',
x= 1.95, # starting point on x, this decides length
xend = 1.3, # end point on x, this decides length
y= 8.2, # starting point on y
yend = 11.3, # ending point on y
color= 'blue',
linetype= 'dashed'
)
library(lubridate)
months <- seq(month(1:12)) # make moths
months <- month.abb[months] # make abbriviations
temperature <- c(10,12,22,32,35,30,33,28,29,25,19,14)
myframe <- data.frame(months,temperature) # creating a new data frame
library(tidyverse)
glimpse(myframe)
## Rows: 12
## Columns: 2
## $ months <chr> "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "S…
## $ temperature <dbl> 10, 12, 22, 32, 35, 30, 33, 28, 29, 25, 19, 14
library(ggplot2)
ggplot(myframe, aes(x=months, y=temperature, group=1))+
geom_line(col='blue')+
geom_point(col='red')+
ggtitle('Temperature of months')+
scale_x_discrete(limits = month.abb) # this will order months on the x axis
p + scale_x_discrete(limits=c("D0.5", "D2"))
## Warning: Removed 1 rows containing missing values (position_stack).
df2 <- data.frame(supp=rep(c("VC", "OJ"), each=3),
dose=rep(c("D0.5", "D1", "D2"),2),
len=c(6.8, 15, 33, 4.2, 10, 29.5))
ggplot(data=df2, aes(x=dose, y=len, fill=supp)) +
geom_bar(stat="identity", position=position_dodge())+
geom_text(aes(label=len), vjust=1.6, color="white",
position = position_dodge(0.9), size=3.5)+
scale_fill_brewer(palette="Paired")+
theme_minimal()
# Stacked barplot with multiple groups
ggplot(data=df2, aes(x=dose, y=len, fill=supp)) +
geom_bar(stat="identity")
# Use position=position_dodge()
ggplot(data=df2, aes(x=dose, y=len, fill=supp)) +
geom_bar(stat="identity", position=position_dodge())
# Change the colors manually
p <- ggplot(data=df2, aes(x=dose, y=len, fill=supp)) +
geom_bar(stat="identity", color="black", position=position_dodge())+
theme_minimal()
# Use custom colors
p + scale_fill_manual(values=c('#999999','#E69F00'))
# Use brewer color palettes
p + scale_fill_brewer(palette="Blues")
#install.packages(c("tidyverse", "gapminder", "MetBrewer"))
libraries
library(tidyverse)
library(gapminder)
# install.packages('MetBrewer')
library(MetBrewer)
Plot the point plot using GDP per Capita as the x- axis and LE as the y axis. Numerical variable Population to control the size of each point.
plot <- gapminder %>%
filter (year==2007) %>%
ggplot()+
labs(x= 'GDP per Capita',
y= 'Life Expectancy',
color= 'Population in millions',
size='Population in millions')+
theme_minimal()
plot+ geom_point(aes(gdpPercap, lifeExp, size= pop/1000000))
To use color in the plot, assign the Population variable to the color aesthetic. Since nothing is specied, ggplot2 chooses a color spectrum for this numerical variable (shades of blue).
plot + geom_point(aes(gdpPercap, lifeExp, size= pop/1000000, color= pop/1000000))
To control the color spectrum, we need to introduce a color scale. In
the following plot, we have to provide a vector of hex color values. You
would choose this if you got your colors from one of the mentioned above
websites.
plot + geom_point(aes(gdpPercap, lifeExp, size= pop/1000000, color= pop/1000000))+
scale_color_gradientn(colors = c("#003049", "#D62828", "#F77F00", "#FCBF49", "#EAE2B7"))
To apply one of the MetBrewer palettes, replace the hex-vector with a MetBrewer function. Within the function call, you provide the palette’s name, then several colors, and tell it that we need a continuous palette since it is a numerical variable.
plot + geom_point(aes(gdpPercap, lifeExp, size= pop/1000000, color= pop/1000000))+
scale_color_gradientn(colors = met.brewer('Cross', n=500, type = 'continuous'))
You might also want to use color palettes with non-numerical variables. Let us assume we want to apply color to the Continent variable. This implies using a manual color scale and providing a MetBrewer palette.
plot + geom_point(aes(gdpPercap, lifeExp, size= pop/1000000, color= continent))+
scale_color_manual(values = met.brewer('Navajo', 5))
Please note if you want to apply color to the fill aesthetic rather than the color aesthetic, consider using the scale_fill_manuel function instead of the scale_color_manuel. This is useful for boxplots or bar charts.
box <- gapminder %>%
filter(gdpPercap< 60000) %>%
ggplot(aes(continent, gdpPercap, color= year, fill= continent))+
geom_boxplot()+
theme_minimal()+ labs( x= 'Continent', y= 'GDP per Capita', fill= 'Continent')
df <- data.frame(
Names=as.factor(c('Bacteria', 'Yeast', 'None')),
Quantity=c(2.5, 5.5, 7.5))
library(ggplot2)
library(tidyverse)
df <- df %>% mutate(Names= fct_relevel(Names, c('Bacteria', 'Yeast', 'None')))
ggplot(df, aes(Names, Quantity, fill= Names))+
geom_bar(stat = 'identity')+
scale_fill_manual(values = c('#110a62', '#fcd749','#b5b4b5'))+
labs(y='Necter pH', x= 'Microbe added to nectar')+
theme_classic()+
theme(legend.position = 'none', axis.ticks.x = element_blank())+
theme(axis.text = element_text(size = 22, color= 'black'))+
theme(axis.line.x = element_blank())+
theme(axis.ticks = element_line(size = 1, color="black"),
axis.ticks.length = unit(.5, "cm"))+
theme(text = element_text(size = 22))
x11() # opne a new window for graphics
graphics.off() # close the new window
Normal distribution, also known as the Gaussian distribution, is a probability distribution that is symmetric about the mean, showing that data near the mean are more frequent in occurrence than data far from the mean.
library(tidyverse)
n = 1000
mean = 170 # cm
sd = 6.35 # cm
binwidth= 0.3
set.seed(1234)
df <- data.frame(x=rnorm(n, mean, sd))
ggplot(df, aes(x = x, mean = mean, sd = sd, binwidth = binwidth, n = n))+
theme_bw()+
geom_histogram(binwidth = binwidth,
colour = "white", fill = "lightblue", size = 0.1)+
stat_function(fun = function(x) dnorm(x, mean = mean, sd = sd) * n * binwidth,
color = "darkred", size = 1)
dice <- c(1:6)
myluck<- function(x){
myluck <- sample(dice, size = 1, replace = T)
return(myluck)
}
myluck()
## [1] 2
names <- c('saneesh','appu','sanusha')
who <- function(x){
who <- sample(names, 1, T)
return(who)
}
who()
## [1] "saneesh"
df <- data.frame(name=as.factor(c('James Bond', 'Spider Man', 'Iron Man')))
# df <- df %>% separate(name, c('Genus', 'Species'), sep = '([ ])')
shorten <- function(df){
name_split <- df %>% separate(name, c('Genus', 'Species'), sep = '([ ])')
print(name_split)
}
shorten(df)
## Genus Species
## 1 James Bond
## 2 Spider Man
## 3 Iron Man
to apply to every chunk in the file
inside the chunk write
knitr::opts_chunk$set(include= ,echo = , message= , warning= )
# knitr::opts_chunk$set(message = TRUE, echo = TRUE, warning = TRUE)
include: to show or hide code and results from
appearing
echo: to show or hide code in the output but shows
result
message to hide or show the messages generated by the
code
warning: to show or hide warning generated by the code
these options can be written for individual chunks as well
## [1] 5
# Heading 1
## Heading 2 ### Heading 3
italics
italic
bold
bold
plot() to show r code/function
@Saneesh
this is a blockquote
— Saneesh
hello
\(by\)
\(\mu\)
\(\sum\)
\(a\pm b\)
\(x=y\)
\(x>y\)
\(x^2\)
\(x\le y\)
\(\sum_{n=1}^{10} n^2\)
\(LUI_i=\frac12(gi/gm)+\frac12(ti/tm)\)
\(x_{1}+ x_{2}+\cdots+x_{n}\)
\(|A|\)
\(A\subset B\)
\(A \subseteq B\)
\(A \cup B\)
\(A \cap B\)
\(P(A|B)\)
\(\alpha\)
\(\beta\)
\(\gamma\)
\(\theta\)
\(H_2O\)
using knitr::kable()
| Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
|---|---|---|---|---|
| 6.3 | 3.3 | 6.0 | 2.5 | virginica |
| 6.3 | 2.9 | 5.6 | 1.8 | virginica |
| 6.3 | 2.7 | 4.9 | 1.8 | virginica |
| 6.3 | 2.8 | 5.1 | 1.5 | virginica |
| 6.3 | 3.4 | 5.6 | 2.4 | virginica |
| 6.3 | 2.5 | 5.0 | 1.9 | virginica |